package cetcbigdata.da.service.zhejiang.tongbanqingdan;

import cetcbigdata.da.base.Base;
import cn.wanghaomiao.xpath.model.JXDocument;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class SpiderZheJiang extends Base {
    private static String baseRegion = "浙江省";

    private static String baseModule = "服务事项";

    @Override
    public void getData(RecordSender recordSender, List<Object> fieldsList) {
        List<JSONObject> data = new ArrayList<>();
        JSONObject item1 = new JSONObject();
        JSONObject item2 = new JSONObject();
        item1.put("name", "服务清单-通办清单");
        item1.put("id", 3);
        item2.put("name", "服务清单-马上办事项清单");
        item2.put("id", 6);
        data.add(item1);
        data.add(item2);
        for(JSONObject topic : data){
            this.parseListPage(recordSender, fieldsList, topic);
        }
    }

    @Override
    public JSONObject getDataDemo() {
        try {
            return processResult("xxx", "xxx", "xxx", "xxx", new JSONObject(), "xxx", "xxx");
        }catch (Exception e){
            e.printStackTrace();
        }
        return null;
    }

    private void parseListPage(RecordSender recordSender, List<Object> fieldsList, JSONObject topic){
        try {
            for(JSONObject regionItem : getRegionList()){
                getPageList(recordSender, fieldsList, regionItem, topic);

            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /***
     * 获取主题信息： 生育收养、 户籍办理 等
     * @return List<JSONObject>
     */
    private static void getPageList(RecordSender recordSender, List<Object> fieldsList,
                                    JSONObject regionItem, JSONObject topic){
        // 当没有数据返回时，网站中会出现 div[@id='nodata'] 元素，则停止翻页
        int pageno = 1;
        while (true){
            String listUrl = "https://www.zjzwfw.gov.cn/zjservice//matter/list/commonList.do";
            JSONObject parmas = new JSONObject();
            parmas.put("jurisCode", regionItem.get("id"));
            parmas.put("dept", "");
            parmas.put("pagesize", 10);
            parmas.put("pageno", pageno);
            parmas.put("total", "");
            parmas.put("categoryType", topic.getInteger("id"));

            JSONObject headers = UtilZheJiang.getHeaders();
            String listResponse =  postFormReq(listUrl, headers, parmas);
            try {
                listResponse = new String(listResponse.getBytes("iso-8859-1"), "utf-8");
            }catch (Exception e){
                e.printStackTrace();
            }
            parseListPage(listUrl, listResponse, recordSender, fieldsList, regionItem , topic);
            pageno ++;
        }
    }

    private static void parseListPage(String listUrl, String listResponse ,
                                                      RecordSender recordSender, List<Object> fieldsList, JSONObject regionItem, JSONObject topic){
        JSONObject responseData = JSONObject.parseObject(listResponse);
        JSONArray dataList = responseData.getJSONArray("itemList");
        for(Object o : dataList){
            JSONObject content = new JSONObject();
            JSONObject item = (JSONObject) o;
            content.put("名称", item.getString("name"));
            String detailUrl = "https://www.zjzwfw.gov.cn/zjservice//item/detail/index.do?localInnerCode=" + item.getString("dcode");
            content.put("地址", detailUrl);
            printDataCount(50);
            // datax 处理数据
            processItem(
                    processResult(
                            listUrl, listResponse, detailUrl, "",
                            content, baseRegion + "-" + regionItem.getString("name"),
                            baseModule + "-" + topic.getString("name")),
                    recordSender,
                    fieldsList
            );
        }
    }


    private static List<JSONObject> getRegionList(){
        String url = "https://zjjcmspublic.oss-cn-hangzhou-zwynet-d01-a.internet.cloud.zj.gov.cn/jcms_files/jcms1/web1/site/script/zjzwfw-2019/new_json2.js";
        JSONObject headers = new JSONObject();
        headers.put("Host", "zjjcmspublic.oss-cn-hangzhou-zwynet-d01-a.internet.cloud.zj.gov.cn");
        headers.put("Referer", "https://www.zjzwfw.gov.cn/");
        headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36");
        String response = null;
        try {
            String responseOrg = getReq(url, headers, UtilZheJiang.getParams());
            response = new String(responseOrg.getBytes("iso-8859-1"), "utf-8");
        }catch (Exception e){
            e.printStackTrace();
        }
        response = response.replace("var jsonwebsite=", "");
        JSONArray dataList =  ( (JSONObject) JSONObject.parseArray(response).get(0)).getJSONArray("childs");
        List<JSONObject> out = new ArrayList<>();
        // 遍历市
        for(Object o : dataList){
            JSONObject shiItem = (JSONObject) o;
            // 遍历区
            JSONArray childs = shiItem.getJSONArray("childs");
            for(Object oo : childs){
                JSONObject quItem = (JSONObject) oo;
                JSONObject outItem = new JSONObject();
                outItem.put("id", quItem.getString("code"));
                outItem.put("name", shiItem.getString("name") + "-" + quItem.getString("name"));
                out.add(outItem);
            }
        }
        return out;
    }

    public static void main(String[] args) {
        SpiderZheJiang spiderZheJiang = new SpiderZheJiang();
        spiderZheJiang.getData(null, new ArrayList<>());
    }
}
